
# Supp Figure 1 - GSB Plasmodium


library(plyr)
library(dplyr)
library(stringr)
library(useful)
library("data.table")
library("conflicted")
library(reshape2)
conflict_prefer("mutate", "dplyr")
suppressPackageStartupMessages(library("tidyverse"))
conflict_prefer("filter", "dplyr")
library(ggplot2)
library(epiDisplay)
library(gmodels)

source('D:/Pipeline comparisons/Writing/R Functions/Binomial Scores Function Peptidoform level.R')
source('D:/Pipeline comparisons/Writing/R Functions/GBS_Function.R')
source('D:/Pipeline comparisons/Writing/R Functions/FLR function Bin Adjusted.R')
source('D:/Pipeline comparisons/Writing/R Functions/Function frequency of site.R')


# TPP #
#######


PXD001684A <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD001684.csv')
PXD002266A <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD002266.csv')
PXD005207A_1 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_Plasmo.csv')
PXD005207A_2 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_Plfalci.csv')
PXD005207A_3 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_SigMix.csv')
PXD005207A_4 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_SigR1.csv')
PXD005207A_5 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_SigR2.csv')
PXD005207A_6 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_SigR3.csv')
PXD005207A_7 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_SigR4.csv')
PXD005207A_8 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_SigR5.csv')
PXD005207A_9 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_StageR1.csv')
PXD005207A_10 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_StageR4.csv')
PXD005207A_11 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD005207_StageR5.csv')
PXD009157A_1 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD009157_1.csv')
PXD009157A_2 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD009157_2.csv')
PXD009465A <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD009465.csv')
PXD026474A_1 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD026474_KO.csv')
PXD026474A_2 <- read.csv(file = 'D:/Pipeline comparisons/Plasmodium/TPP/PXD026474_WT.csv')


PXD001684A$dataset <- "PXD001684"
PXD002266A$dataset <- "PXD002266"
PXD005207A_1$dataset <- "PXD005207"
PXD005207A_2$dataset <- "PXD005207"
PXD005207A_3$dataset <- "PXD005207"
PXD005207A_4$dataset <- "PXD005207"
PXD005207A_5$dataset <- "PXD005207"
PXD005207A_6$dataset <- "PXD005207"
PXD005207A_7$dataset <- "PXD005207"
PXD005207A_8$dataset <- "PXD005207"
PXD005207A_9$dataset <- "PXD005207"
PXD005207A_10$dataset <- "PXD005207"
PXD005207A_11$dataset <- "PXD005207"
PXD009157A_1$dataset <- "PXD009157"
PXD009157A_2$dataset <- "PXD009157"
PXD009465A$dataset <- "PXD009465"
PXD026474A_1$dataset <- "PXD026474"
PXD026474A_2$dataset <- "PXD026474"

# pASTY peptidoform level #
##########################


library(plyr)

PXD001684A_pform <- binAdjustPform(PXD001684A)
PXD002266A_pform <- binAdjustPform(PXD002266A)
PXD005207A_1_pform <- binAdjustPform(PXD005207A_1)
PXD005207A_2_pform <- binAdjustPform(PXD005207A_2)
PXD005207A_3_pform <- binAdjustPform(PXD005207A_3)
PXD005207A_4_pform <- binAdjustPform(PXD005207A_4)
PXD005207A_5_pform <- binAdjustPform(PXD005207A_5)
PXD005207A_6_pform <- binAdjustPform(PXD005207A_6)
PXD005207A_7_pform <- binAdjustPform(PXD005207A_7)
PXD005207A_8_pform <- binAdjustPform(PXD005207A_8)
PXD005207A_9_pform <- binAdjustPform(PXD005207A_9)
PXD005207A_10_pform <- binAdjustPform(PXD005207A_10)
PXD005207A_11_pform <- binAdjustPform(PXD005207A_11)
PXD009157A_1_pform <- binAdjustPform(PXD009157A_1)
PXD009157A_2_pform <- binAdjustPform(PXD009157A_2)
PXD009465A_pform <- binAdjustPform(PXD009465A)
PXD026474A_1_pform <- binAdjustPform(PXD026474A_1)
PXD026474A_2_pform <- binAdjustPform(PXD026474A_2)

detach(package:plyr)


PXD001684A_pform <- FLR_AdjTPP(PXD001684A_pform)
PXD002266A_pform <- FLR_AdjTPP(PXD002266A_pform)
PXD005207A_1_pform <- FLR_AdjTPP(PXD005207A_1_pform)
PXD005207A_2_pform <- FLR_AdjTPP(PXD005207A_2_pform)
PXD005207A_3_pform <- FLR_AdjTPP(PXD005207A_3_pform)
PXD005207A_4_pform <- FLR_AdjTPP(PXD005207A_4_pform)
PXD005207A_5_pform <- FLR_AdjTPP(PXD005207A_5_pform)
PXD005207A_6_pform <- FLR_AdjTPP(PXD005207A_6_pform)
PXD005207A_7_pform <- FLR_AdjTPP(PXD005207A_7_pform)
PXD005207A_8_pform <- FLR_AdjTPP(PXD005207A_8_pform)
PXD005207A_9_pform <- FLR_AdjTPP(PXD005207A_9_pform)
PXD005207A_10_pform <- FLR_AdjTPP(PXD005207A_10_pform)
PXD005207A_11_pform <- FLR_AdjTPP(PXD005207A_11_pform)
PXD009157A_1_pform <- FLR_AdjTPP(PXD009157A_1_pform)
PXD009157A_2_pform <- FLR_AdjTPP(PXD009157A_2_pform)
PXD009465A_pform <- FLR_AdjTPP(PXD009465A_pform)
PXD026474A_1_pform <- FLR_AdjTPP(PXD026474A_1_pform)
PXD026474A_2_pform <- FLR_AdjTPP(PXD026474A_2_pform)

PXD001684A_pformc <- PXD001684A_pform[setdiff(names(PXD001684A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD002266A_pformc <- PXD002266A_pform[setdiff(names(PXD002266A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_1_pformc <- PXD005207A_1_pform[setdiff(names(PXD005207A_1_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_2_pformc <- PXD005207A_2_pform[setdiff(names(PXD005207A_2_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_3_pformc <- PXD005207A_3_pform[setdiff(names(PXD005207A_3_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_4_pformc <- PXD005207A_4_pform[setdiff(names(PXD005207A_4_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_5_pformc <- PXD005207A_5_pform[setdiff(names(PXD005207A_5_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_6_pformc <- PXD005207A_6_pform[setdiff(names(PXD005207A_6_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_7_pformc <- PXD005207A_7_pform[setdiff(names(PXD005207A_7_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_8_pformc <- PXD005207A_8_pform[setdiff(names(PXD005207A_8_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_9_pformc <- PXD005207A_9_pform[setdiff(names(PXD005207A_9_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_10_pformc <- PXD005207A_10_pform[setdiff(names(PXD005207A_10_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD005207A_11_pformc <- PXD005207A_11_pform[setdiff(names(PXD005207A_11_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD009157A_1_pformc <- PXD009157A_1_pform[setdiff(names(PXD009157A_1_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD009157A_2_pformc <- PXD009157A_2_pform[setdiff(names(PXD009157A_1_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD009465A_pformc <- PXD009465A_pform[setdiff(names(PXD009465A_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD026474A_1_pformc <- PXD026474A_1_pform[setdiff(names(PXD026474A_1_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]
PXD026474A_2_pformc <- PXD026474A_2_pform[setdiff(names(PXD026474A_2_pform), c("PROTEIN_POS", "PRO_pos_list", "PTM_length", "PTM_beg2", "PTM_end2", "PTM_End", "PTM_Beginning"))]


AllPlasmodium_pASTY_pform <-dplyr::bind_rows(PXD001684A_pformc,PXD002266A_pformc,PXD005207A_1_pformc,PXD005207A_2_pformc,PXD005207A_3_pformc,
                                       PXD005207A_4_pformc,PXD005207A_5_pformc,PXD005207A_6_pformc,PXD005207A_7_pformc,PXD005207A_8_pformc,
                                       PXD005207A_9_pformc,PXD005207A_10_pformc,PXD005207A_10_pformc,PXD009157A_1_pformc,
                                       PXD009157A_2_pformc,PXD009465A_pformc,PXD026474A_1_pformc,PXD026474A_2_pformc)


AllPlasmodium_pASTY_pform_Excluding_A <- AllPlasmodium_pASTY_pform[AllPlasmodium_pASTY_pform$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_Excluding_A$dataset)



PXD001684A_pform_01 <- PXD001684A_pformc[1:max(which(PXD001684A_pformc$FLR_Adj_Score<=0.01)),]
PXD002266A_pform_01 <- PXD002266A_pformc[1:max(which(PXD002266A_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_1_pform_01 <- PXD005207A_1_pformc[1:max(which(PXD005207A_1_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_2_pform_01 <- PXD005207A_2_pformc[1:max(which(PXD005207A_2_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_3_pform_01 <- PXD005207A_3_pformc[1:max(which(PXD005207A_3_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_4_pform_01 <- PXD005207A_4_pformc[1:max(which(PXD005207A_4_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_5_pform_01 <- PXD005207A_5_pformc[1:max(which(PXD005207A_5_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_6_pform_01 <- PXD005207A_6_pformc[1:max(which(PXD005207A_6_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_7_pform_01 <- PXD005207A_7_pformc[1:max(which(PXD005207A_7_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_8_pform_01 <- PXD005207A_8_pformc[1:max(which(PXD005207A_8_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_9_pform_01 <- PXD005207A_9_pformc[1:max(which(PXD005207A_9_pformc$FLR_Adj_Score<=0.01)),]
PXD005207A_10_pform_01 <- PXD005207A_10_pformc[1:max(which(PXD005207A_10_pformc$FLR_Adj_Score<=0.01)),]
#PXD005207A_11_pform_01 <- PXD005207A_11_pformc[1:max(which(PXD005207A_11_pformc$FLR_Adj_Score<=0.01)),]
PXD009157A_1_pform_01 <- PXD009157A_1_pformc[1:max(which(PXD009157A_1_pformc$FLR_Adj_Score<=0.01)),]
PXD009157A_2_pform_01 <- PXD009157A_2_pformc[1:max(which(PXD009157A_2_pformc$FLR_Adj_Score<=0.01)),]
PXD009465A_pform_01 <- PXD009465A_pformc[1:max(which(PXD009465A_pformc$FLR_Adj_Score<=0.01)),]
PXD026474A_1_pform_01 <- PXD026474A_1_pformc[1:max(which(PXD026474A_1_pformc$FLR_Adj_Score<=0.01)),]
PXD026474A_2_pform_01 <- PXD026474A_2_pformc[1:max(which(PXD026474A_2_pformc$FLR_Adj_Score<=0.01)),]


PXD001684A_pform_05 <- PXD001684A_pformc[1:max(which(PXD001684A_pformc$FLR_Adj_Score<=0.05)),]
PXD002266A_pform_05 <- PXD002266A_pformc[1:max(which(PXD002266A_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_1_pform_05 <- PXD005207A_1_pformc[1:max(which(PXD005207A_1_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_2_pform_05 <- PXD005207A_2_pformc[1:max(which(PXD005207A_2_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_3_pform_05 <- PXD005207A_3_pformc[1:max(which(PXD005207A_3_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_4_pform_05 <- PXD005207A_4_pformc[1:max(which(PXD005207A_4_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_5_pform_05 <- PXD005207A_5_pformc[1:max(which(PXD005207A_5_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_6_pform_05 <- PXD005207A_6_pformc[1:max(which(PXD005207A_6_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_7_pform_05 <- PXD005207A_7_pformc[1:max(which(PXD005207A_7_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_8_pform_05 <- PXD005207A_8_pformc[1:max(which(PXD005207A_8_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_9_pform_05 <- PXD005207A_9_pformc[1:max(which(PXD005207A_9_pformc$FLR_Adj_Score<=0.05)),]
PXD005207A_10_pform_05 <- PXD005207A_10_pformc[1:max(which(PXD005207A_10_pformc$FLR_Adj_Score<=0.05)),]
#PXD005207A_11_pform_05 <- PXD005207A_11_pformc[1:max(which(PXD005207A_11_pformc$FLR_Adj_Score<=0.05)),]
PXD009157A_1_pform_05 <- PXD009157A_1_pformc[1:max(which(PXD009157A_1_pformc$FLR_Adj_Score<=0.05)),]
PXD009157A_2_pform_05 <- PXD009157A_2_pformc[1:max(which(PXD009157A_2_pformc$FLR_Adj_Score<=0.05)),]
PXD009465A_pform_05 <- PXD009465A_pformc[1:max(which(PXD009465A_pformc$FLR_Adj_Score<=0.05)),]
PXD026474A_1_pform_05 <- PXD026474A_1_pformc[1:max(which(PXD026474A_1_pformc$FLR_Adj_Score<=0.05)),]
PXD026474A_2_pform_05 <- PXD026474A_2_pformc[1:max(which(PXD026474A_2_pformc$FLR_Adj_Score<=0.05)),]


AllPlasmodium_pASTY_pform_01 <-dplyr::bind_rows(PXD001684A_pform_01,PXD002266A_pform_01,PXD005207A_1_pform_01,PXD005207A_2_pform_01,PXD005207A_3_pform_01,
                                          PXD005207A_4_pform_01,PXD005207A_5_pform_01,PXD005207A_6_pform_01,PXD005207A_7_pform_01,PXD005207A_8_pform_01,
                                          PXD005207A_9_pform_01,PXD005207A_10_pform_01,PXD009157A_1_pform_01,
                                          PXD009157A_2_pform_01,PXD009465A_pform_01,PXD026474A_1_pform_01,PXD026474A_2_pform_01)


AllPlasmodium_pASTY_pform_05 <-dplyr::bind_rows(PXD001684A_pform_05,PXD002266A_pform_05,PXD005207A_1_pform_05,PXD005207A_2_pform_05,PXD005207A_3_pform_05,
                                          PXD005207A_4_pform_05,PXD005207A_5_pform_05,PXD005207A_6_pform_05,PXD005207A_7_pform_05,PXD005207A_8_pform_05,
                                          PXD005207A_9_pform_05,PXD005207A_10_pform_05,PXD009157A_1_pform_05,
                                          PXD009157A_2_pform_05,PXD009465A_pform_05,PXD026474A_1_pform_05,PXD026474A_2_pform_05)

AllPlasmodium_pASTY_pform_01_Excluding_A <- AllPlasmodium_pASTY_pform_01[AllPlasmodium_pASTY_pform_01$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_01_Excluding_A$dataset)

AllPlasmodium_pASTY_pform_02.5_Excluding_A <- AllPlasmodium_pASTY_pform_02.5[AllPlasmodium_pASTY_pform_02.5$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_02.5_Excluding_A$dataset)

AllPlasmodium_pASTY_pform_05_Excluding_A <- AllPlasmodium_pASTY_pform_05[AllPlasmodium_pASTY_pform_05$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_05_Excluding_A$dataset)

AllPlasmodium_pASTY_pform$New_FLR_PEP <-AllPlasmodium_pASTY_pform$FLR_Adj_Score
AllPlasmodium_pASTY_pform_01$New_FLR_PEP <-AllPlasmodium_pASTY_pform_01$FLR_Adj_Score
AllPlasmodium_pASTY_pform_05$New_FLR_PEP <-AllPlasmodium_pASTY_pform_05$FLR_Adj_Score

AllPlasmodium_pASTY_pform_Final <- GSB_Function(AllPlasmodium_pASTY_pform_01,AllPlasmodium_pASTY_pform_05)

str(AllPlasmodium_pASTY_pform_Final)


PlasmodiumY <- AllPlasmodium_pASTY_pform_Final[(AllPlasmodium_pASTY_pform_Final$Amino=="Y") & (AllPlasmodium_pASTY_pform_Final$cat != "Bronze"),]


CrossTable(AllPlasmodium_pASTY_pform_Final$cat, AllPlasmodium_pASTY_pform_Final$Amino)

df2 <- data.frame(Amino=rep(c("S", "T", "Y", "A"), each=3),
                  Level=rep(c("Gold:1166", "Silver:5639", "Bronze:5052"),4),
                  Unique_sites=c(1056, 4783, 4007, 108, 806, 802, #2
                                 NA, 36, 115, NA, 14, 128))

# Inset 2 manually in gold

ggplot(data=df2, aes(x=Level, y=Unique_sites, fill=Amino)) +
  geom_bar(stat="identity")

p <- df2 %>%
  dplyr::arrange(Unique_sites) %>%
  mutate(Level = factor(Level, levels=c("Gold:1166", "Silver:5639", "Bronze:5052"))) %>%
  ggplot(aes(x=Level, y=Unique_sites, fill=Amino)) +
  geom_bar(stat="identity") +
  xlab("") 

p + geom_text(aes(label = Unique_sites), position = position_stack(vjust = 0.8), size = 5) + 
  theme(text = element_text(size = 20))


write.csv(AllPlasmodium_pASTY_pform_Final, "D:/Pipeline comparisons/Writing/Data/GSB/Plasmodium/TPP_GSB.csv", row.names=FALSE)

write.csv(PXD001684A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD001684A_pform.csv", row.names=FALSE)
write.csv(PXD002266A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD002266A_pform.csv", row.names=FALSE)
write.csv(PXD005207A_1_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_1_pform.csv", row.names=FALSE)
write.csv(PXD005207A_2_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_2_pform.csv", row.names=FALSE)
write.csv(PXD005207A_3_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_3_pform.csv", row.names=FALSE)
write.csv(PXD005207A_4_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_4_pform.csv", row.names=FALSE)
write.csv(PXD005207A_5_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_5_pform.csv", row.names=FALSE)
write.csv(PXD005207A_6_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_6_pform.csv", row.names=FALSE)
write.csv(PXD005207A_7_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_7_pform.csv", row.names=FALSE)
write.csv(PXD005207A_8_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_8_pform.csv", row.names=FALSE)
write.csv(PXD005207A_9_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_9_pform.csv", row.names=FALSE)
write.csv(PXD005207A_10_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_10_pform.csv", row.names=FALSE)
write.csv(PXD005207A_11_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD005207A_11_pform.csv", row.names=FALSE)
write.csv(PXD009157A_1_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD009157A_1_pform.csv", row.names=FALSE)
write.csv(PXD009157A_2_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD009157A_2_pform.csv", row.names=FALSE)
write.csv(PXD009465A_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD009465A_pform.csv", row.names=FALSE)
write.csv(PXD026474A_1_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD026474A_1_pform.csv", row.names=FALSE)
write.csv(PXD026474A_2_pformc, "D:/Pipeline comparisons/Writing/Data/TPP/Plasmodium/pform/TPP_PXD026474A_2_pform.csv", row.names=FALSE)

# MQ #
######


source('D:/Pipeline comparisons/Writing/R Functions/MQ Binomial Scores Function Peptidoform level MQ.R')
source('D:/Pipeline comparisons/Writing/R Functions/MQ FLR function Bin Adjusted MQ.R')
source('D:/Pipeline comparisons/Writing/R Functions/Function frequency of site MaxQuant.R')



PXD001864A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/MQ_PXD001684_A_PSMSITE.csv')
PXD002266A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/MQ_PXD002266_A_PSMSITE.csv')
PXD005207A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/MQ_PXD005207_A_PSMSITE.csv')
PXD009157A_1 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/MQ_PXD009157_1_A_PSMSITE.csv')
PXD009157A_2 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/MQ_PXD009157_2_A_PSMSITE.csv')
PXD009465A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/MQ_PXD009465_A_PSMSITE.csv')
PXD026474A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/MQ_PXD026474_A_PSMSITE.csv')



PXD001864A$dataset <- "PXD001864"
PXD002266A$dataset <- "PXD002266"
PXD005207A$dataset <- "PXD005207"
PXD009157A_1$dataset <- "PXD009157"
PXD009157A_2$dataset <- "PXD009157"
PXD009465A$dataset <- "PXD009465"
PXD026474A$dataset <- "PXD026474"



PXD001864A$Unique_scan <- paste0(PXD001864A$MS.MS.scan.numbers,"_",PXD001864A$Retention.time)
PXD002266A$Unique_scan <- paste0(PXD002266A$MS.MS.scan.numbers,"_",PXD002266A$Retention.time)
PXD005207A$Unique_scan <- paste0(PXD005207A$MS.MS.scan.numbers,"_",PXD005207A$Retention.time)
PXD009157A_1$Unique_scan <- paste0(PXD009157A_1$MS.MS.scan.numbers,"_",PXD009157A_1$Retention.time)
PXD009157A_2$Unique_scan <- paste0(PXD009157A_2$MS.MS.scan.numbers,"_",PXD009157A_2$Retention.time)
PXD009465A$Unique_scan <- paste0(PXD009465A$MS.MS.scan.numbers,"_",PXD009465A$Retention.time)
PXD026474A$Unique_scan <- paste0(PXD026474A$MS.MS.scan.numbers,"_",PXD026474A$Retention.time)


library(plyr)

PXD001864A_pform <- binAdjustPformMQ(PXD001864A)
PXD002266A_pform <- binAdjustPformMQ(PXD002266A)
PXD005207A_pform <- binAdjustPformMQ(PXD005207A)
PXD009157A_1_pform <- binAdjustPformMQ(PXD009157A_1)
PXD009157A_2_pform <- binAdjustPformMQ(PXD009157A_2)
PXD009465A_pform <- binAdjustPformMQ(PXD009465A)
PXD026474A_pform <- binAdjustPformMQ(PXD026474A)


detach(package:plyr)


PXD001864A_pform <- FLR_AdjMQ(PXD001864A_pform)
PXD002266A_pform <- FLR_AdjMQ(PXD002266A_pform)
PXD005207A_pform <- FLR_AdjMQ(PXD005207A_pform)
PXD009157A_1_pform <- FLR_AdjMQ(PXD009157A_1_pform)
PXD009157A_2_pform <- FLR_AdjMQ(PXD009157A_2_pform)
PXD009465A_pform <- FLR_AdjMQ(PXD009465A_pform)
PXD026474A_pform <- FLR_AdjMQ(PXD026474A_pform)



AllPlasmodium_pASTY_pform <-dplyr::bind_rows(PXD001864A_pform,PXD002266A_pform, PXD005207A_pform, PXD009157A_1_pform,PXD009157A_2_pform,
                                       PXD009465A_pform, PXD026474A_pform)

AllPlasmodium_pASTY_pform_Excluding_A <- AllPlasmodium_pASTY_pform[AllPlasmodium_pASTY_pform$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_Excluding_A$dataset)


PXD001864A_pform_01 <- PXD001864A_pform[1:max(which(PXD001864A_pform$FLR_Adj_Score<=0.01)),]
PXD002266A_pform_01 <- PXD002266A_pform[1:max(which(PXD002266A_pform$FLR_Adj_Score<=0.01)),]
PXD005207A_pform_01 <- PXD005207A_pform[1:max(which(PXD005207A_pform$FLR_Adj_Score<=0.01)),]
PXD009157A_1_pform_01 <- PXD009157A_1_pform[1:max(which(PXD009157A_1_pform$FLR_Adj_Score<=0.01)),]
PXD009157A_2_pform_01 <- PXD009157A_2_pform[1:max(which(PXD009157A_2_pform$FLR_Adj_Score<=0.01)),]
PXD009465A_pform_01 <- PXD009465A_pform[1:max(which(PXD009465A_pform$FLR_Adj_Score<=0.01)),]
PXD026474A_pform_01 <- PXD026474A_pform[1:max(which(PXD026474A_pform$FLR_Adj_Score<=0.01)),]


PXD001864A_pform_02.5 <- PXD001864A_pform[1:max(which(PXD001864A_pform$FLR_Adj_Score<=0.025)),]
PXD002266A_pform_02.5 <- PXD002266A_pform[1:max(which(PXD002266A_pform$FLR_Adj_Score<=0.025)),]
PXD005207A_pform_02.5 <- PXD005207A_pform[1:max(which(PXD005207A_pform$FLR_Adj_Score<=0.025)),]
PXD009157A_1_pform_02.5 <- PXD009157A_1_pform[1:max(which(PXD009157A_1_pform$FLR_Adj_Score<=0.025)),]
PXD009157A_2_pform_02.5 <- PXD009157A_2_pform[1:max(which(PXD009157A_2_pform$FLR_Adj_Score<=0.025)),]
PXD009465A_pform_02.5 <- PXD009465A_pform[1:max(which(PXD009465A_pform$FLR_Adj_Score<=0.025)),]
PXD026474A_pform_02.5 <- PXD026474A_pform[1:max(which(PXD026474A_pform$FLR_Adj_Score<=0.025)),]


PXD001864A_pform_05 <- PXD001864A_pform[1:max(which(PXD001864A_pform$FLR_Adj_Score<=0.05)),]
PXD002266A_pform_05 <- PXD002266A_pform[1:max(which(PXD002266A_pform$FLR_Adj_Score<=0.05)),]
PXD005207A_pform_05 <- PXD005207A_pform[1:max(which(PXD005207A_pform$FLR_Adj_Score<=0.05)),]
PXD009157A_1_pform_05 <- PXD009157A_1_pform[1:max(which(PXD009157A_1_pform$FLR_Adj_Score<=0.05)),]
PXD009157A_2_pform_05 <- PXD009157A_2_pform[1:max(which(PXD009157A_2_pform$FLR_Adj_Score<=0.05)),]
PXD009465A_pform_05 <- PXD009465A_pform[1:max(which(PXD009465A_pform$FLR_Adj_Score<=0.05)),]
PXD026474A_pform_05 <- PXD026474A_pform[1:max(which(PXD026474A_pform$FLR_Adj_Score<=0.05)),]



AllPlasmodium_pASTY_pform_01<-dplyr::bind_rows(PXD001864A_pform_01, PXD002266A_pform_01, PXD005207A_pform_01, PXD009157A_1_pform_01,
                                               PXD009157A_2_pform_01, PXD009465A_pform_01, PXD026474A_pform_01)

AllPlasmodium_pASTY_pform_02.5<-dplyr::bind_rows(PXD001864A_pform_02.5, PXD002266A_pform_02.5, PXD005207A_pform_02.5, PXD009157A_1_pform_02.5,
                                                 PXD009157A_2_pform_02.5, PXD009465A_pform_02.5, PXD026474A_pform_02.5)

AllPlasmodium_pASTY_pform_05<-dplyr::bind_rows(PXD001864A_pform_05, PXD002266A_pform_05, PXD005207A_pform_05, PXD009157A_1_pform_05,
                                               PXD009157A_2_pform_05, PXD009465A_pform_05, PXD026474A_pform_05)

AllPlasmodium_pASTY_pform_01_Excluding_A <- AllPlasmodium_pASTY_pform_01[AllPlasmodium_pASTY_pform_01$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_01_Excluding_A$dataset)

AllPlasmodium_pASTY_pform_02.5_Excluding_A <- AllPlasmodium_pASTY_pform_02.5[AllPlasmodium_pASTY_pform_02.5$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_02.5_Excluding_A$dataset)

AllPlasmodium_pASTY_pform_05_Excluding_A <- AllPlasmodium_pASTY_pform_05[AllPlasmodium_pASTY_pform_05$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_05_Excluding_A$dataset)

AllPlasmodium_pASTY_pform$New_FLR_PEP <- AllPlasmodium_pASTY_pform$FLR_Adj_Score
AllPlasmodium_pASTY_pform_01$New_FLR_PEP <- AllPlasmodium_pASTY_pform_01$FLR_Adj_Score
AllPlasmodium_pASTY_pform_05$New_FLR_PEP <- AllPlasmodium_pASTY_pform_05$FLR_Adj_Score

AllPlasmodium_pASTY_pform_Final <- GSB_Function(AllPlasmodium_pASTY_pform_01,AllPlasmodium_pASTY_pform_05)

str(AllPlasmodium_pASTY_pform_Final)


CrossTable(AllPlasmodium_pASTY_pform_Final$cat, AllPlasmodium_pASTY_pform_Final$Amino)

df2 <- data.frame(Amino=rep(c("S", "T", "Y", "A"), each=3),
                  Level=rep(c("Gold:1343", "Silver:5813", "Bronze:6464"),4),
                  Unique_sites=c(1232, 4978, 5259, 109, 805, 1047, NA
                                 #18 to put manually as numbers overlap
                                 , 26, 69, NA, 4, 89))

# Introduce 2 manually in Gold 
ggplot(data=df2, aes(x=Level, y=Unique_sites, fill=Amino)) +
  geom_bar(stat="identity")

p <- df2 %>%
  dplyr::arrange(Unique_sites) %>%
  mutate(Level = factor(Level, levels=c("Gold:1343", "Silver:5813", "Bronze:6464"))) %>%
  ggplot(aes(x=Level, y=Unique_sites, fill=Amino)) +
  geom_bar(stat="identity") +
  xlab("") 

p + geom_text(aes(label = Unique_sites), position = position_stack(vjust =  0.7), size = 5) + 
  theme(text = element_text(size = 20))

write.csv(AllPlasmodium_pASTY_pform_Final, "D:/Pipeline comparisons/Writing/Data/GSB/Plasmodium/MQ_GSB.csv", row.names=FALSE)


write.csv(PXD001864A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/pform/MQ_PXD001684A_pform.csv")
write.csv(PXD002266A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/pform/MQ_PXD002266A_pform.csv")
write.csv(PXD005207A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/pform/MQ_PXD005207A_pform.csv")
write.csv(PXD009157A_1_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/pform/MQ_PXD009157A_1_pform.csv")
write.csv(PXD009157A_2_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/pform/MQ_PXD009157A_2_pform.csv")
write.csv(PXD009465A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/pform/MQ_PXD009465A_pform.csv")
write.csv(PXD026474A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Plasmodium/pform/MQ_PXD026474A_pform.csv")

# PD #
######


source('D:/Pipeline comparisons/Writing/R Functions/PD Binomial Scores Function Peptidoform level PD.R')
source('D:/Pipeline comparisons/Writing/R Functions/PD FLR function Bin Adjusted PD.R')
source('D:/Pipeline comparisons/Writing/R Functions/Function frequency of site PDiscoverer.R')



PXD001684A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/PD_PXD001684A_PSMSITE.csv')
PXD002266A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/PD_PXD002266A_PSMSITE.csv')
PXD005207A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/PD_PXD005207A_PSMSITE.csv')
PXD009157A_1 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/PD_PXD009157A_1_PSMSITE.csv')
PXD009157A_2 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/PD_PXD009157A_2_PSMSITE.csv')
PXD009465A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/PD_PXD009465A_PSMSITE.csv')
PXD026474A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/PD_PXD026474A_PSMSITE.csv')



PXD001684A$dataset <- "PXD001684"
PXD002266A$dataset <- "PXD002266"
PXD005207A$dataset <- "PXD005207"
PXD009157A_1$dataset <- "PXD009157"
PXD009157A_2$dataset <- "PXD009157"
PXD009465A$dataset <- "PXD009465"
PXD026474A$dataset <- "PXD026474"



PXD001684A$PTM_Final_prob <- (PXD001684A$ptmRS_Score/100)*(1-PXD001684A$PEP)
PXD002266A$PTM_Final_prob <- (PXD002266A$ptmRS_Score/100)*(1-PXD002266A$PEP)
PXD005207A$PTM_Final_prob <- (PXD005207A$ptmRS_Score/100)*(1-PXD005207A$PEP)
PXD009157A_1$PTM_Final_prob <- (PXD009157A_1$ptmRS_Score/100)*(1-PXD009157A_1$PEP)
PXD009157A_2$PTM_Final_prob <- (PXD009157A_2$ptmRS_Score/100)*(1-PXD009157A_2$PEP)
PXD009465A$PTM_Final_prob <- (PXD009465A$ptmRS_Score/100)*(1-PXD009465A$PEP)
PXD026474A$PTM_Final_prob <- (PXD026474A$ptmRS_Score/100)*(1-PXD026474A$PEP)



library(plyr)

PXD001684A_pform <- binAdjustPformPD(PXD001684A)
PXD002266A_pform <- binAdjustPformPD(PXD002266A)
PXD005207A_pform <- binAdjustPformPD(PXD005207A)
PXD009157A_1_pform <- binAdjustPformPD(PXD009157A_1)
PXD009157A_2_pform <- binAdjustPformPD(PXD009157A_2)
PXD009465A_pform <- binAdjustPformPD(PXD009465A)
PXD026474A_pform <- binAdjustPformPD(PXD026474A)


detach(package:plyr)


PXD001684A_pform <- FLR_AdjPD(PXD001684A_pform)
PXD002266A_pform <- FLR_AdjPD(PXD002266A_pform)
PXD005207A_pform <- FLR_AdjPD(PXD005207A_pform)
PXD009157A_1_pform <- FLR_AdjPD(PXD009157A_1_pform)
PXD009157A_2_pform <- FLR_AdjPD(PXD009157A_2_pform)
PXD009465A_pform <- FLR_AdjPD(PXD009465A_pform)
PXD026474A_pform <- FLR_AdjPD(PXD026474A_pform)



AllPlasmodium_pASTY_pform <-dplyr::bind_rows(PXD001684A_pform,PXD002266A_pform, PXD005207A_pform, PXD009157A_1_pform,PXD009157A_2_pform, 
                                             PXD009465A_pform,PXD026474A_pform)

AllPlasmodium_pASTY_pform_Excluding_A <- AllPlasmodium_pASTY_pform[AllPlasmodium_pASTY_pform$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_Excluding_A$dataset, graph = FALSE)


PXD001684A_pform_01 <- PXD001684A_pform[1:max(which(PXD001684A_pform$FLR_Adj_Score<=0.01)),]
PXD002266A_pform_01 <- PXD002266A_pform[1:max(which(PXD002266A_pform$FLR_Adj_Score<=0.01)),]
PXD005207A_pform_01 <- PXD005207A_pform[1:max(which(PXD005207A_pform$FLR_Adj_Score<=0.01)),]
PXD009157A_1_pform_01 <- PXD009157A_1_pform[1:max(which(PXD009157A_1_pform$FLR_Adj_Score<=0.01)),]
PXD009157A_2_pform_01 <- PXD009157A_2_pform[1:max(which(PXD009157A_2_pform$FLR_Adj_Score<=0.01)),]
PXD009465A_pform_01 <- PXD009465A_pform[1:max(which(PXD009465A_pform$FLR_Adj_Score<=0.01)),]
PXD026474A_pform_01 <- PXD026474A_pform[1:max(which(PXD026474A_pform$FLR_Adj_Score<=0.01)),]


PXD001684A_pform_02.5 <- PXD001684A_pform[1:max(which(PXD001684A_pform$FLR_Adj_Score<=0.025)),]
PXD002266A_pform_02.5 <- PXD002266A_pform[1:max(which(PXD002266A_pform$FLR_Adj_Score<=0.025)),]
PXD005207A_pform_02.5 <- PXD005207A_pform[1:max(which(PXD005207A_pform$FLR_Adj_Score<=0.025)),]
PXD009157A_1_pform_02.5 <- PXD009157A_1_pform[1:max(which(PXD009157A_1_pform$FLR_Adj_Score<=0.025)),]
PXD009157A_2_pform_02.5 <- PXD009157A_2_pform[1:max(which(PXD009157A_2_pform$FLR_Adj_Score<=0.025)),]
PXD009465A_pform_02.5 <- PXD009465A_pform[1:max(which(PXD009465A_pform$FLR_Adj_Score<=0.025)),]
PXD026474A_pform_02.5 <- PXD026474A_pform[1:max(which(PXD026474A_pform$FLR_Adj_Score<=0.025)),]


PXD001684A_pform_05 <- PXD001684A_pform[1:max(which(PXD001684A_pform$FLR_Adj_Score<=0.05)),]
PXD002266A_pform_05 <- PXD002266A_pform[1:max(which(PXD002266A_pform$FLR_Adj_Score<=0.05)),]
PXD005207A_pform_05 <- PXD005207A_pform[1:max(which(PXD005207A_pform$FLR_Adj_Score<=0.05)),]
PXD009157A_1_pform_05 <- PXD009157A_1_pform[1:max(which(PXD009157A_1_pform$FLR_Adj_Score<=0.05)),]
PXD009157A_2_pform_05 <- PXD009157A_2_pform[1:max(which(PXD009157A_2_pform$FLR_Adj_Score<=0.05)),]
PXD009465A_pform_05 <- PXD009465A_pform[1:max(which(PXD009465A_pform$FLR_Adj_Score<=0.05)),]
PXD026474A_pform_05 <- PXD026474A_pform[1:max(which(PXD026474A_pform$FLR_Adj_Score<=0.05)),]



AllPlasmodium_pASTY_pform_01<-dplyr::bind_rows(PXD001684A_pform_01, PXD002266A_pform_01, PXD005207A_pform_01, PXD009157A_1_pform_01,PXD009157A_2_pform_01, 
                                               PXD009465A_pform_01, PXD026474A_pform_01)

AllPlasmodium_pASTY_pform_02.5<-dplyr::bind_rows(PXD001684A_pform_02.5, PXD002266A_pform_02.5, PXD005207A_pform_02.5, PXD009157A_1_pform_02.5,PXD009157A_2_pform_02.5,
                                                 PXD009465A_pform_02.5, PXD026474A_pform_02.5)

AllPlasmodium_pASTY_pform_05<-dplyr::bind_rows(PXD001684A_pform_05, PXD002266A_pform_05, PXD005207A_pform_05, PXD009157A_1_pform_05,PXD009157A_2_pform_05, 
                                               PXD009465A_pform_05, PXD026474A_pform_05)

AllPlasmodium_pASTY_pform_01_Excluding_A <- AllPlasmodium_pASTY_pform_01[AllPlasmodium_pASTY_pform_01$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_01_Excluding_A$dataset, graph = FALSE)

AllPlasmodium_pASTY_pform_02.5_Excluding_A <- AllPlasmodium_pASTY_pform_02.5[AllPlasmodium_pASTY_pform_02.5$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_02.5_Excluding_A$dataset, graph = FALSE)

AllPlasmodium_pASTY_pform_05_Excluding_A <- AllPlasmodium_pASTY_pform_05[AllPlasmodium_pASTY_pform_05$Amino!="A",]

tab1(AllPlasmodium_pASTY_pform_05_Excluding_A$dataset, graph = FALSE)


AllPlasmodium_pASTY_pform$New_FLR_PEP <- AllPlasmodium_pASTY_pform$FLR_Adj_Score
AllPlasmodium_pASTY_pform_01$New_FLR_PEP <- AllPlasmodium_pASTY_pform_01$FLR_Adj_Score
AllPlasmodium_pASTY_pform_05$New_FLR_PEP <- AllPlasmodium_pASTY_pform_05$FLR_Adj_Score

AllPlasmodium_pASTY_pform_Final <- GSB_Function(AllPlasmodium_pASTY_pform_01,AllPlasmodium_pASTY_pform_05)


CrossTable(AllPlasmodium_pASTY_pform_Final$cat, AllPlasmodium_pASTY_pform_Final$Amino)

df2 <- data.frame(Amino=rep(c("S", "T", "Y", "A"), each=3),
                  Level=rep(c("Gold:817", "Silver:3574", "Bronze:5317"),4),
                  Unique_sites=c(749, 3216, 4476, 68, 350, 691, NA, 6, 84, NA, 2, 64))


ggplot(data=df2, aes(x=Level, y=Unique_sites, fill=Amino)) +
  geom_bar(stat="identity")

p <- df2 %>%
  dplyr::arrange(Unique_sites) %>%
  mutate(Level = factor(Level, levels=c("Gold:817", "Silver:3574", "Bronze:5317"))) %>%
  ggplot(aes(x=Level, y=Unique_sites, fill=Amino)) +
  geom_bar(stat="identity") +
  xlab("") 

p + geom_text(aes(label = Unique_sites), position = position_stack(vjust = 0.8), size = 5) + 
  theme(text = element_text(size = 20))

write.csv(AllPlasmodium_pASTY_pform_Final, "D:/Pipeline comparisons/Writing/Data/GSB/Plasmodium/PD_GSB.csv", row.names=FALSE)


write.csv(PXD001684A_pform, "D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/pform/PD_PXD001684A_pform.csv",row.names = FALSE)
write.csv(PXD002266A_pform, "D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/pform/PD_PXD002266A_pform.csv",row.names = FALSE)
write.csv(PXD005207A_pform, "D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/pform/PD_PXD005207A_pform.csv",row.names = FALSE)
write.csv(PXD009157A_1_pform, "D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/pform/PD_PXD009157A_1_pform.csv",row.names = FALSE)
write.csv(PXD009157A_2_pform, "D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/pform/PD_PXD009157A_2_pform.csv",row.names = FALSE)
write.csv(PXD009465A_pform, "D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/pform/PD_PXD009465A_PSMSITE.csv",row.names = FALSE)
write.csv(PXD026474A_pform, "D:/Pipeline comparisons/Writing/Data/PD/Plasmodium/pform/PD_PXD026474A_pform.csv",row.names = FALSE)

